82370d
@@ -27,9 +27,9 @@
Licensed to the Apache Software Foundation (ASF) under one
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.repl.ReplStateLogWork;
+import org.apache.hadoop.hive.ql.exec.repl.bootstrap.ReplLoadTask;
 import org.apache.hadoop.hive.ql.exec.repl.bootstrap.events.TableEvent;
 import org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.ReplicationState;
-import org.apache.hadoop.hive.ql.exec.repl.bootstrap.ReplLoadTask;
 import org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.TaskTracker;
 import org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.util.Context;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -44,12 +44,17 @@
Licensed to the Apache Software Foundation (ASF) under one
 import org.apache.hadoop.hive.ql.plan.ImportTableDesc;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
+import org.datanucleus.util.StringUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 
 import static org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.ReplicationState.PartitionState;
 import static org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer.isPartitioned;
@@ -173,21 +178,29 @@
private ReplicationState initialReplicationState() throws SemanticException {
   private TaskTracker forNewTable() throws Exception {
     Iterator<AddPartitionDesc> iterator = event.partitionDescriptions(tableDesc).iterator();
     while (iterator.hasNext() && tracker.canAddMoreTasks()) {
-      AddPartitionDesc addPartitionDesc = iterator.next();
-      tracker.addTask(addSinglePartition(table, addPartitionDesc));
-      if (iterator.hasNext() && !tracker.canAddMoreTasks()) {
-        ReplicationState currentReplicationState =
-                new ReplicationState(new PartitionState(table.getTableName(), addPartitionDesc));
-        updateReplicationState(currentReplicationState);
-      }
+      AddPartitionDesc currentPartitionDesc = iterator.next();
+      /*
+       the currentPartitionDesc cannot be inlined as we need the hasNext() to be evaluated post the
+       current retrieved lastReplicatedPartition
+      */
+      addPartition(iterator.hasNext(), currentPartitionDesc);
     }
     return tracker;
   }
 
+  private void addPartition(boolean hasMorePartitions, AddPartitionDesc addPartitionDesc) throws Exception {
+    tracker.addTask(tasksForAddPartition(table, addPartitionDesc));
+    if (hasMorePartitions && !tracker.canAddMoreTasks()) {
+      ReplicationState currentReplicationState =
+          new ReplicationState(new PartitionState(table.getTableName(), addPartitionDesc));
+      updateReplicationState(currentReplicationState);
+    }
+  }
+
   /**
    * returns the root task for adding a partition
    */
-  private Task<? extends Serializable> addSinglePartition(Table table,
+  private Task<? extends Serializable> tasksForAddPartition(Table table,
       AddPartitionDesc addPartitionDesc) throws MetaException, IOException, HiveException {
     AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0);
     Path sourceWarehousePartitionLocation = new Path(partSpec.getLocation());
@@ -262,50 +275,48 @@
private Path locationOnReplicaWarehouse(Table table, AddPartitionDesc.OnePartiti
 
   private TaskTracker forExistingTable(AddPartitionDesc lastPartitionReplicated) throws Exception {
     boolean encounteredTheLastReplicatedPartition = (lastPartitionReplicated == null);
+    Map<String, String> lastReplicatedPartSpec = null;
+    if (!encounteredTheLastReplicatedPartition) {
+      lastReplicatedPartSpec = lastPartitionReplicated.getPartition(0).getPartSpec();
+      LOG.info("Start processing from partition info spec : {}",
+          StringUtils.mapToString(lastReplicatedPartSpec));
+    }
+
     ReplicationSpec replicationSpec = event.replicationSpec();
-    LOG.debug("table partitioned");
+    Iterator<AddPartitionDesc> partitionIterator = event.partitionDescriptions(tableDesc).iterator();
+    while (!encounteredTheLastReplicatedPartition && partitionIterator.hasNext()) {
+      AddPartitionDesc addPartitionDesc = partitionIterator.next();
+      Map<String, String> currentSpec = addPartitionDesc.getPartition(0).getPartSpec();
+      encounteredTheLastReplicatedPartition = lastReplicatedPartSpec.equals(currentSpec);
+    }
 
-    Iterator<AddPartitionDesc> iterator = event.partitionDescriptions(tableDesc).iterator();
-    while (iterator.hasNext()) {
-      /*
-      encounteredTheLastReplicatedPartition will be set, when we break creation of partition tasks
-      for a table, as we have reached the limit of number of tasks we should create for execution.
-      in this case on the next run we have to iterate over the partitions desc to reach the last replicated
-      partition so that we can start replicating partitions after that.
-       */
-      AddPartitionDesc addPartitionDesc = iterator.next();
-      if (encounteredTheLastReplicatedPartition && tracker.canAddMoreTasks()) {
-        Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
-        Partition ptn;
-
-        if ((ptn = context.hiveDb.getPartition(table, partSpec, false)) == null) {
-          if (!replicationSpec.isMetadataOnly()) {
-            forNewTable();
-          }
-        } else {
-          // If replicating, then the partition already existing means we need to replace, maybe, if
-          // the destination ptn's repl.last.id is older than the replacement's.
-          if (replicationSpec.allowReplacementInto(ptn.getParameters())) {
-            if (replicationSpec.isMetadataOnly()) {
-              tracker.addTask(alterSinglePartition(addPartitionDesc, replicationSpec, ptn));
-              if (iterator.hasNext() && !tracker.canAddMoreTasks()) {
-                tracker.setReplicationState(
-                    new ReplicationState(new PartitionState(table.getTableName(), addPartitionDesc)
-                    )
-                );
-              }
-            } else {
-              forNewTable();
+    while (partitionIterator.hasNext() && tracker.canAddMoreTasks()) {
+      AddPartitionDesc addPartitionDesc = partitionIterator.next();
+      Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
+      Partition ptn = context.hiveDb.getPartition(table, partSpec, false);
+      if (ptn == null) {
+        if (!replicationSpec.isMetadataOnly()) {
+          addPartition(partitionIterator.hasNext(), addPartitionDesc);
+        }
+      } else {
+        // If replicating, then the partition already existing means we need to replace, maybe, if
+        // the destination ptn's repl.last.id is older than the replacement's.
+        if (replicationSpec.allowReplacementInto(ptn.getParameters())) {
+          if (replicationSpec.isMetadataOnly()) {
+            tracker.addTask(alterSinglePartition(addPartitionDesc, replicationSpec, ptn));
+            if (!tracker.canAddMoreTasks()) {
+              tracker.setReplicationState(
+                  new ReplicationState(
+                      new PartitionState(table.getTableName(), addPartitionDesc)
+                  )
+              );
             }
           } else {
-            // ignore this ptn, do nothing, not an error.
+            addPartition(partitionIterator.hasNext(), addPartitionDesc);
           }
+        } else {
+          // ignore this ptn, do nothing, not an error.
         }
-      } else {
-        Map<String, String> currentSpec = addPartitionDesc.getPartition(0).getPartSpec();
-        Map<String, String> lastReplicatedPartSpec =
-            lastPartitionReplicated.getPartition(0).getPartSpec();
-        encounteredTheLastReplicatedPartition = lastReplicatedPartSpec.equals(currentSpec);
       }
     }
     return tracker;
